<!DOCTYPE html>



  


<html class="theme-next pisces use-motion" lang="zh-Hans">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">


<meta name="google-site-verification" content="E9deYnivN5MuHMuIfiMZZfS0alv-d_0UjcwjBL79lGU" />



<meta name="baidu-site-verification" content="iHYWJxscwD" />










<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />



  <meta name="google-site-verification" content="true" />








  <meta name="baidu-site-verification" content="true" />







  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />







<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="Python,量化投资,时间序列分析,kaggle," />










<meta name="description" content="关于时间序列你所能做的一切Siddharth Yadav翻译自https:&#x2F;&#x2F;www.kaggle.com&#x2F;thebrownviking20&#x2F;everything-you-can-do-with-a-time-series数据文件也在上面链接里。或者上我的github代码库:https:&#x2F;&#x2F;github.com&#x2F;zwdnet&#x2F;MyQuant&#x2F;tree&#x2F;master&#x2F;11目标从我注册这个平台的第一周">
<meta property="og:type" content="article">
<meta property="og:title" content="量化投资学习笔记11——关于时间序列你所能做的一切">
<meta property="og:url" content="https://zwdnet.github.io/2020/03/07/%E9%87%8F%E5%8C%96%E6%8A%95%E8%B5%84%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B011%E2%80%94%E2%80%94%E5%85%B3%E4%BA%8E%E6%97%B6%E9%97%B4%E5%BA%8F%E5%88%97%E4%BD%A0%E6%89%80%E8%83%BD%E5%81%9A%E7%9A%84%E4%B8%80%E5%88%87/index.html">
<meta property="og:site_name" content="赵瑜敏的口腔医学专业学习博客">
<meta property="og:description" content="关于时间序列你所能做的一切Siddharth Yadav翻译自https:&#x2F;&#x2F;www.kaggle.com&#x2F;thebrownviking20&#x2F;everything-you-can-do-with-a-time-series数据文件也在上面链接里。或者上我的github代码库:https:&#x2F;&#x2F;github.com&#x2F;zwdnet&#x2F;MyQuant&#x2F;tree&#x2F;master&#x2F;11目标从我注册这个平台的第一周">
<meta property="og:locale">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/01.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/02.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/03.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/04.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/05.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/06.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/07.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/08.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/09.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/10.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/11.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/12.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/13.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/14.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/15.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/16.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/17.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/18.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/19.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/20.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/21.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/22.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/23.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/24.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/25.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/26.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/27.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/28.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/29.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/30.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/31.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/32.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/33.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/34.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/35.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/36.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/37.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/38.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/39.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/40.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/41.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/42.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/43.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/44.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/45.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/46.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/47.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/48.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/49.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/50.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/51.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/52.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/53.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/54.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/55.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/56.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/57.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/58.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/59.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/60.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/61.png">
<meta property="og:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/other/wx.jpg">
<meta property="article:published_time" content="2020-03-07T03:08:16.000Z">
<meta property="article:modified_time" content="2020-08-30T05:52:04.000Z">
<meta property="article:author" content="赵瑜敏">
<meta property="article:tag" content="Python">
<meta property="article:tag" content="量化投资">
<meta property="article:tag" content="时间序列分析">
<meta property="article:tag" content="kaggle">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/01.png">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '',
    scheme: 'Pisces',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="https://zwdnet.github.io/2020/03/07/量化投资学习笔记11——关于时间序列你所能做的一切/"/>





  <title>量化投资学习笔记11——关于时间序列你所能做的一切 | 赵瑜敏的口腔医学专业学习博客</title>
  








<meta name="generator" content="Hexo 5.4.0"></head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">赵瑜敏的口腔医学专业学习博客</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle"></p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/%20" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/%20" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories/%20" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/%20" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      

      
    </ul>
  

  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="https://zwdnet.github.io/2020/03/07/%E9%87%8F%E5%8C%96%E6%8A%95%E8%B5%84%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B011%E2%80%94%E2%80%94%E5%85%B3%E4%BA%8E%E6%97%B6%E9%97%B4%E5%BA%8F%E5%88%97%E4%BD%A0%E6%89%80%E8%83%BD%E5%81%9A%E7%9A%84%E4%B8%80%E5%88%87/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/other/tx.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="赵瑜敏的口腔医学专业学习博客">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">量化投资学习笔记11——关于时间序列你所能做的一切</h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2020-03-07T03:08:16+00:00">
                2020-03-07
              </time>
            

            

            
          </span>

          
            <span class="post-category" >
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/%E9%87%8F%E5%8C%96%E6%8A%95%E8%B5%84/" itemprop="url" rel="index">
                    <span itemprop="name">量化投资</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                  <span class="post-meta-divider">|</span>
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  8.2k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  37
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>关于时间序列你所能做的一切<br>Siddharth Yadav<br>翻译自<a target="_blank" rel="noopener" href="https://www.kaggle.com/thebrownviking20/everything-you-can-do-with-a-time-series">https://www.kaggle.com/thebrownviking20/everything-you-can-do-with-a-time-series</a><br>数据文件也在上面链接里。或者上我的github代码库:<a target="_blank" rel="noopener" href="https://github.com/zwdnet/MyQuant/tree/master/11">https://github.com/zwdnet/MyQuant/tree/master/11</a><br>目标<br>从我注册这个平台的第一周，我就被时间序列分析这个主题给迷住了。本文是关于时间序列分析的许多广泛的话题的一个集合体。我写作本文的目的是为时间序列分析初学者和有经验的人提供一个基本的参考。<br>一些重要的事情<br>1.本教程还在完成中，所以你每次打开它都有可能会发现有更新的内容。<br>2.我在写这篇教程时已经学习过很多这个领域的课程，我还在继续学习更多的更高级的课程以获得更多的知识和内容。<br>3.如果您有任何建议或者有任何主题希望本教材覆盖，请在评论区留言。<br>4.如果您欣赏本文，请一定点赞（按喜欢按钮）。这样它能对社区有更大的意义和帮助。<br>首先导入相关的库</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># Importing libraries</span></span><br><span class="line"><span class="keyword">import</span> os</span><br><span class="line"><span class="keyword">import</span> warnings</span><br><span class="line">warnings.filterwarnings(<span class="string">&#x27;ignore&#x27;</span>)</span><br><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt</span><br><span class="line">plt.style.use(<span class="string">&#x27;fivethirtyeight&#x27;</span>)</span><br><span class="line"><span class="comment"># Above is a special style template for matplotlib, highly useful for visualizing time series data</span></span><br><span class="line">%matplotlib inline</span><br><span class="line"><span class="keyword">from</span> pylab <span class="keyword">import</span> rcParams</span><br><span class="line"><span class="keyword">from</span> plotly <span class="keyword">import</span> tools</span><br><span class="line"><span class="keyword">import</span> plotly.plotly <span class="keyword">as</span> py</span><br><span class="line"><span class="keyword">from</span> plotly.offline <span class="keyword">import</span> init_notebook_mode, iplot</span><br><span class="line">init_notebook_mode(connected=<span class="literal">True</span>)</span><br><span class="line"><span class="keyword">import</span> plotly.graph_objs <span class="keyword">as</span> go</span><br><span class="line"><span class="keyword">import</span> plotly.figure_factory <span class="keyword">as</span> ff</span><br><span class="line"><span class="keyword">import</span> statsmodels.api <span class="keyword">as</span> sm</span><br><span class="line"><span class="keyword">from</span> numpy.random <span class="keyword">import</span> normal, seed</span><br><span class="line"><span class="keyword">from</span> scipy.stats <span class="keyword">import</span> norm</span><br><span class="line"><span class="keyword">from</span> statsmodels.tsa.arima_model <span class="keyword">import</span> ARMA</span><br><span class="line"><span class="keyword">from</span> statsmodels.tsa.stattools <span class="keyword">import</span> adfuller</span><br><span class="line"><span class="keyword">from</span> statsmodels.graphics.tsaplots <span class="keyword">import</span> plot_acf, plot_pacf</span><br><span class="line"><span class="keyword">from</span> statsmodels.tsa.arima_process <span class="keyword">import</span> ArmaProcess</span><br><span class="line"><span class="keyword">from</span> statsmodels.tsa.arima_model <span class="keyword">import</span> ARIMA</span><br><span class="line"><span class="keyword">import</span> math</span><br><span class="line"><span class="keyword">from</span> sklearn.metrics <span class="keyword">import</span> mean_squared_error</span><br><span class="line">print(os.listdir(<span class="string">&quot;../input&quot;</span>))</span><br></pre></td></tr></table></figure>
<p>输出</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">[<span class="string">&#x27;historical-hourly-weather-data&#x27;</span>, <span class="string">&#x27;stock-time-series-20050101-to-20171231&#x27;</span>]</span><br></pre></td></tr></table></figure>
<p>目录<br>1.介绍日期与时间<br>1.1 导入时间序列数据<br>1.2 时间序列数据的清洗与准备<br>1.3 数据的可视化<br>1.4 时间戳和周期<br>1.5 使用date_range<br>1.6 使用to_datetime<br>1.7 转换与延迟(shifting and lags)<br>1.8 重取样<br>2.金融与统计学<br>2.1 改变的百分率<br>2.2 证券收益<br>2.3 相继列的绝对改变(Absolute change in sucessive rows)<br>2.4 比较两个或更多的时间序列<br>2.5 窗口函数<br>2.6 OHLC图<br>2.7 蜡烛图<br>2.8 自相关与部分自相关<br>3.时间序列分解与随机行走<br>3.1 趋势、季节性和噪音<br>3.2 白噪音<br>3.3 随机行走<br>3.4 稳定性(Stationarity)<br>4.使用statsmodels建模<br>4.1 AR模型<br>4.2 MA模型<br>4.3 ARMA模型<br>4.4 ARIMA模型<br>4.5 VAR模型<br>4.6 状态空间模型<br>4.6.1 SARIMA模型<br>4.6.2 未观察到的部分(Unobserved omponents)<br>4.6.3 动态因子模型</p>
<p>1.介绍日期与时间<br>1.1 导入时间序列数据<br>如何导入数据？<br>首先，我们导入本教程需要的所有数据集。所需的时间序列数据的列作为日期时间使用parse_dates参数导入，另外可以使用dateframe的index_col参数来选择索引。<br>我们将使用的数据包括:<br>1.谷歌股票数据<br>2.世界各个城市的温度数据<br>3.微软股票数据<br>4.世界各个城市的气压数据</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 导入数据</span></span><br><span class="line"> google = pd.read_csv(<span class="string">&quot;input/stock-time-series-20050101-to-20171231/GOOGL_2006-01-01_to_2018-01-01.csv&quot;</span>, index_col = <span class="string">&quot;Date&quot;</span>, parse_dates = [<span class="string">&quot;Date&quot;</span>])</span><br><span class="line"> print(google.head())</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/01.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">humidity = pd.read_csv(<span class="string">&quot;input/historical-hourly-weather-data/humidity.csv&quot;</span>, index_col = <span class="string">&quot;datetime&quot;</span>, parse_dates = [<span class="string">&#x27;datetime&#x27;</span>])</span><br><span class="line"> print(humidity.tail())</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/02.png"><br>1.2 时间序列数据的清洗与准备<br>如何准备数据?<br>谷歌股票数据没有缺失项，而气温数据有缺失数据。使用fillna()方法，其ffill参数采用最近的有效观测值来填充缺失值。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 填充缺失值</span></span><br><span class="line"> humidity = humidity.iloc[<span class="number">1</span>:]</span><br><span class="line"> humidity = humidity.fillna(method = <span class="string">&quot;ffill&quot;</span>)</span><br><span class="line"> print(humidity.head())</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/03.png"><br>1.3 数据集的可视化</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 数据集的可视化</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> humidity[<span class="string">&quot;Kansas City&quot;</span>].asfreq(<span class="string">&quot;M&quot;</span>).plot()</span><br><span class="line"> plt.title(<span class="string">&quot;Humidity in Kansas City over time(Monthly frequency)&quot;</span>)</span><br><span class="line"> fig.savefig(<span class="string">&quot;Kansas_humidity.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/04.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">fig = plt.figure()</span><br><span class="line">google[<span class="string">&quot;2008&quot;</span>:<span class="string">&quot;2010&quot;</span>].plot(subplots = <span class="literal">True</span>, figsize = (<span class="number">10</span>, <span class="number">12</span>))</span><br><span class="line">plt.title(<span class="string">&quot;Google stocks from 2008 to 2010&quot;</span>)</span><br><span class="line">plt.savefig(<span class="string">&quot;google.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/05.png"><br>1.4 时间戳和周期<br>什么是时间戳和周期?它们如何有用?<br>时间戳是用来代表时间中的一个点。周期是时间的一段间隔。周期可以用来检查某一特定事件是否发生在给定的期间内。它们也可以被转化为其它形式。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 时间戳</span></span><br><span class="line"> timestamp = pd.Timestamp(<span class="number">2017</span>, <span class="number">1</span>, <span class="number">1</span>, <span class="number">12</span>)</span><br><span class="line"> print(timestamp)</span><br></pre></td></tr></table></figure>
<p>2017-01-01 12:00:00</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 建立一个周期</span></span><br><span class="line"> period = pd.Period(<span class="string">&quot;2017-01-01&quot;</span>)</span><br><span class="line"> print(period)</span><br></pre></td></tr></table></figure>
<p>2017-01-01</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 检查一个给定的时间戳是否在一个给定的时间周期中</span></span><br><span class="line"> print(period.start_time &lt; timestamp &lt; period.end_time)</span><br></pre></td></tr></table></figure>
<p>True</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 将时间戳转换为周期</span></span><br><span class="line"> new_period = timestamp.to_period(freq = <span class="string">&quot;H&quot;</span>)</span><br><span class="line"> print(new_period)</span><br></pre></td></tr></table></figure>
<p>2017-01-01 12:00</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 将周期转换为时间戳</span></span><br><span class="line"> new_timestamp = period.to_timestamp(freq = <span class="string">&quot;H&quot;</span>, how = <span class="string">&quot;start&quot;</span>)</span><br><span class="line"> print(new_timestamp)</span><br></pre></td></tr></table></figure>
<p>2017-01-01 00:00:00<br>1.5 使用date_range<br>什么是date_range以及其为何那么有用?<br>date_range是一个返回固定频率的日期时间的方法。在你基于一个已经存在的数据序列建立时间序列数据，或者重新安排整个时间序列数据时它非常有用。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 以每天的频率建立一个时间日期索引</span></span><br><span class="line"> dr1 = pd.date_range(start = <span class="string">&quot;1/1/18&quot;</span>, end = <span class="string">&quot;1/9/18&quot;</span>)</span><br><span class="line"> print(dr1)</span><br></pre></td></tr></table></figure>
<p>DatetimeIndex([‘2018-01-01’, ‘2018-01-02’, ‘2018-01-03’, ‘2018-01-04’, ‘2018-01-05’, ‘2018-01-06’, ‘2018-01-07’, ‘2018-01-08’, ‘2018-01-09’], dtype=’datetime64[ns]’, freq=’D’) </p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 以每月的频率建立一个时间日期索引</span></span><br><span class="line"> dr2 = pd.date_range(start = <span class="string">&quot;1/1/18&quot;</span>, end = <span class="string">&quot;1/1/19&quot;</span>, freq = <span class="string">&quot;M&quot;</span>)</span><br><span class="line"> print(dr2)</span><br></pre></td></tr></table></figure>
<p>DatetimeIndex([‘2018-01-31’, ‘2018-02-28’, ‘2018-03-31’, ‘2018-04-30’, ‘2018-05-31’, ‘2018-06-30’, ‘2018-07-31’, ‘2018-08-31’, ‘2018-09-30’, ‘2018-10-31’, ‘2018-11-30’, ‘2018-12-31’], dtype=’datetime64[ns]’, freq=’M’)</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 不设置日期起点，设定终点和周期</span></span><br><span class="line"> dr3 = pd.date_range(end = <span class="string">&quot;1/4/2014&quot;</span>, periods = <span class="number">8</span>)</span><br><span class="line"> print(dr3)</span><br></pre></td></tr></table></figure>
<p>DatetimeIndex([‘2013-12-28’, ‘2013-12-29’, ‘2013-12-30’, ‘2013-12-31’,                                     ‘2014-01-01’, ‘2014-01-02’, ‘2014-01-03’, ‘2014-01-04’],                                   dtype=’datetime64[ns]’, freq=’D’)</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 指定起止日期和周期</span></span><br><span class="line">dr4 = pd.date_range(start = <span class="string">&quot;2013-04-24&quot;</span>, end = <span class="string">&quot;2014-11-27&quot;</span>, periods = <span class="number">3</span>)</span><br><span class="line">print(dr4)</span><br></pre></td></tr></table></figure>
<p>DatetimeIndex([‘2013-04-24’, ‘2014-02-09’, ‘2014-11-27’], dtype=’datetime64[ns]’, freq=None)<br>1.6 使用to_datetime<br>pandas.to_datetime()用来将变量转换为datetime变量。这里，将一个DateFrame转换为datetime序列。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 使用to_datetime</span></span><br><span class="line"> df = pd.DataFrame(&#123;</span><br><span class="line"> <span class="string">&quot;year&quot;</span> : [<span class="number">2015</span>, <span class="number">2016</span>],</span><br><span class="line"> <span class="string">&quot;month&quot;</span> : [<span class="number">2</span>, <span class="number">3</span>],</span><br><span class="line"> <span class="string">&quot;day&quot;</span> : [<span class="number">4</span>, <span class="number">5</span>]</span><br><span class="line"> &#125;)</span><br><span class="line"> print(df)</span><br><span class="line"> df = pd.to_datetime(df)</span><br><span class="line"> print(df)</span><br><span class="line"> df = pd.to_datetime(<span class="string">&quot;01-01-2017&quot;</span>)</span><br><span class="line"> print(df)</span><br></pre></td></tr></table></figure>
<p>year month day<br>0 2015 2 4<br>1 2016 3 5<br>0 2015-02-04<br>1 2016-03-05<br>dtype: datetime64[ns]<br> 2017-01-01 00:00:00</p>
<p>1.7 变换和延迟<br>我们可以通过提供时间间隔来变换索引。这对于比较一个时间序列与其自身的历史数据很有用。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 索引变换</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> humidity[<span class="string">&quot;Vancouver&quot;</span>].asfreq(<span class="string">&#x27;M&#x27;</span>).plot(legend = <span class="literal">True</span>)</span><br><span class="line"> shifted = humidity[<span class="string">&quot;Vancouver&quot;</span>].asfreq(<span class="string">&#x27;M&#x27;</span>).shift(<span class="number">10</span>).plot(legend = <span class="literal">True</span>)</span><br><span class="line"> shifted.legend([<span class="string">&#x27;Vancouver&#x27;</span>,<span class="string">&#x27;Vancouver_lagged&#x27;</span>])</span><br><span class="line"> fig.savefig(<span class="string">&quot;shifted.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/06.png"><br>1.8 重取样<br>上采样(Upsampling):时间序列数据从低时间频率到高时间频率采样(月到天)。这涉及到采用填充或插值的方法处理缺失数据。<br>下采样(Downsampling):时间序列数据从高时间频率到低时间频率采样(天到月)。这涉及到合并已存在的数据。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 采用气压数据演示重采样</span></span><br><span class="line"> pressure = pd.read_csv(<span class="string">&quot;input/historical-hourly-weather-data/pressure.csv&quot;</span>, index_col = <span class="string">&quot;datetime&quot;</span>, parse_dates = [<span class="string">&quot;datetime&quot;</span>])</span><br><span class="line"> print(pressure.tail())</span><br><span class="line"> pressure = pressure.iloc[<span class="number">1</span>:]</span><br><span class="line"> <span class="comment"># 用前值填充nan</span></span><br><span class="line"> pressure = pressure.fillna(method = <span class="string">&quot;ffill&quot;</span>)</span><br><span class="line"> print(pressure.tail())</span><br><span class="line"> pressure = pressure.fillna(method = <span class="string">&quot;bfill&quot;</span>)</span><br><span class="line"> print(pressure.head())</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/07.png"><br>首先我们使用ffill参数用nan之前最后一个可用数据来填充，接着我们使用bfill用nan之后第一个可用的数据来填充。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 输出数据规模</span></span><br><span class="line"> print(pressure.shape)</span><br></pre></td></tr></table></figure>
<p>(45252, 36)</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 使用平均数从小时数据到3天数据进行向下采样</span></span><br><span class="line">pressure = pressure.resample(<span class="string">&quot;3D&quot;</span>).mean()</span><br><span class="line">print(pressure.head())</span><br><span class="line">print(pressure.shape)</span><br></pre></td></tr></table></figure>
<p>Vancouver … Jerusalem<br> datetime …<br>2012-10-01 931.627119 … 990.525424<br>2012-10-04 1019.083333 … 990.083333 2012-10-07 1013.930556 … 989.833333 2012-10-10 1015.000000 … 987.888889 2012-10-13 1008.152778 … 990.430556<br> [5 rows x 36 columns]<br> (629, 36)</p>
<p>只剩下较少的行数了。现在我们从3天数据向每日数据进行上采样。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 从三日数据向每日数据进行上采样</span></span><br><span class="line"> pressure = pressure.resample(<span class="string">&#x27;D&#x27;</span>).pad()</span><br><span class="line"> print(pressure.head())</span><br><span class="line"> print(pressure.shape)</span><br></pre></td></tr></table></figure>
<p>Vancouver  …   Jerusalem      datetime                 …<br>2012-10-01   931.627119  …  990.525424      2012-10-02   931.627119  …  990.525424      2012-10-03   931.627119  …  990.525424      2012-10-04  1019.083333  …  990.083333  2012-10-05  1019.083333  … 990.083333<br>[5 rows x 36 columns]<br>(1885, 36)</p>
<p>2.金融和统计学<br>2.1 改变的百分率</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 改变的百分率</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> google[<span class="string">&quot;Change&quot;</span>] = google.High.div(google.High.shift())</span><br><span class="line"> google[<span class="string">&quot;Change&quot;</span>].plot(figsize = (<span class="number">20</span>, <span class="number">8</span>))</span><br><span class="line"> fig.savefig(<span class="string">&quot;percent.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/08.png"><br>2.2 证券收益</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 证券收益</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> google[<span class="string">&quot;Return&quot;</span>] = google.Change.sub(<span class="number">1</span>).mul(<span class="number">100</span>)</span><br><span class="line"> google[<span class="string">&quot;Return&quot;</span>].plot(figsize = (<span class="number">20</span>, <span class="number">8</span>))</span><br><span class="line"> fig.savefig(<span class="string">&quot;Return1.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/09.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 另一个计算方法</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">google.High.pct_change().mul(<span class="number">100</span>).plot(figsize = (<span class="number">20</span>, <span class="number">6</span>))</span><br><span class="line">fig.savefig(<span class="string">&quot;Return2.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/10.png"></p>
<p>2.3 相继列的绝对改变(Absolute change in sucessive rows)</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 比较相继序列的绝对改变</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">google.High.diff().plot(figsize = (<span class="number">20</span>, <span class="number">6</span>))</span><br><span class="line">fig.savefig(<span class="string">&quot;AbsoluteChange.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/11.png"><br>2.4 比较两个或更多时间序列<br>我们将通过正态化(normalizing)来比较两个时间序列。这是通过将时间序列中的每个元素除以第一个元素来实现的。这样两个时间序列都在同一个起点开始，可以更容易的比较。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 比较两个不同的序列，微软和谷歌的股票</span></span><br><span class="line"> microsoft = pd.read_csv(<span class="string">&quot;input/stock-time-series-20050101-to-20171231/MSFT_2006-01-01_to_2018-01-01.csv&quot;</span>, index_col = <span class="string">&quot;Date&quot;</span>, parse_dates = [<span class="string">&quot;Date&quot;</span>])</span><br><span class="line"> <span class="comment"># 在正态化以前绘图</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> google.High.plot()</span><br><span class="line"> microsoft.High.plot()</span><br><span class="line"> plt.legend([<span class="string">&quot;Google&quot;</span>, <span class="string">&quot;Microsoft&quot;</span>])</span><br><span class="line"> fig.savefig(<span class="string">&quot;Compare.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/12.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 进行正态化并进行比较</span></span><br><span class="line"> normalized_google = google.High.div(google.High.iloc[<span class="number">0</span>]).mul(<span class="number">100</span>)</span><br><span class="line"> normalized_microsoft = microsoft.High.div(microsoft.High.iloc[<span class="number">0</span>]).mul(<span class="number">100</span>)</span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> normalized_google.plot()</span><br><span class="line"> normalized_microsoft.plot()</span><br><span class="line"> plt.legend([<span class="string">&quot;Google&quot;</span>, <span class="string">&quot;Microsoft&quot;</span>])</span><br><span class="line"> fig.savefig(<span class="string">&quot;NormalizedCompare.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/13.png"><br>可以看到谷歌的股价表现好于微软数倍。<br>2.5 窗口函数<br>窗口函数用于定义子序列的周期，计算子周期内的子集。<br>有两种:<br>Rolling 相同的大小和切片<br>Expanding 包含所有之前的数据</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># Rolling窗口函数</span></span><br><span class="line"> <span class="comment"># 90日均线吧</span></span><br><span class="line"> rolling_google = google.High.rolling(<span class="string">&quot;90D&quot;</span>).mean()</span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> google.High.plot()</span><br><span class="line"> rolling_google.plot()</span><br><span class="line"> plt.legend([<span class="string">&quot;High&quot;</span>, <span class="string">&quot;Rolling Mean&quot;</span>])</span><br><span class="line"> fig.savefig(<span class="string">&quot;RollongGoogle.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/14.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># Expanding窗口函数</span></span><br><span class="line"> microsoft_mean = microsoft.High.expanding().mean()</span><br><span class="line"> microsoft_std = microsoft.High.expanding().std()</span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> microsoft.High.plot()</span><br><span class="line"> microsoft_mean.plot()</span><br><span class="line"> microsoft_std.plot()</span><br><span class="line"> plt.legend([<span class="string">&quot;High&quot;</span>, <span class="string">&quot;Expanding Mean&quot;</span>, <span class="string">&quot;Expanding Standard Deviation&quot;</span>])</span><br><span class="line"> fig.savefig(<span class="string">&quot;ExpandingMicrosoft.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/15.png"><br>2.6 收市价图(OHLC charts)<br>收市价图是表示一定时间周期内任何类型的价格的开盘价、最高价、最低价、以及收盘价的图形。开盘(Open)-最高(High)-最低(Low)-收盘(Close)，OHLC图用来作为一种交易工具来可视化和分析证券、外汇、股票、债券、期货等的价格随时间的变化。收市价图对于解释市场价格的每日的变化以及通过模式识别来预测未来的价格改变很有帮助。<br>收市价图的y轴用来表示价格尺度，而x轴用来表示时间尺度。在每一单独的时间周期内，一个蜡烛图用一个符号来代表两个范围：交易的最高价和最低价，以及那个时间段（例如一天）的开盘价和收盘价。在符号的范围内，最高价和最低价的范围用主要竖线的长度来代表。开盘价和收盘价用位于竖线左边（代表开盘价）和右边（代表收盘价）的刻度线来表示。<br>每个收市价图符号都有颜色，以区别是“牛市”(bullish)（收盘价比开盘价高）或者“熊市”（bearish）（收盘价比开盘价低）。（文中颜色貌似与我们的习惯是反着的，熊市是红色，牛市是绿色。——译者注）<br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/16.png"><br>(作图这部分程序我手机上库用不了，就照搬原文了。）</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># OHLC chart of June 2008trace = go.Ohlc(x=google[&#x27;06-2008&#x27;].index,</span></span><br><span class="line">                <span class="built_in">open</span>=google[<span class="string">&#x27;06-2008&#x27;</span>].Open,</span><br><span class="line">                high=google[<span class="string">&#x27;06-2008&#x27;</span>].High,</span><br><span class="line">                low=google[<span class="string">&#x27;06-2008&#x27;</span>].Low,</span><br><span class="line">                close=google[<span class="string">&#x27;06-2008&#x27;</span>].Close)data = [trace]iplot(data, filename=<span class="string">&#x27;simple_ohlc&#x27;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/17.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># OHLC chart of 2008trace = go.Ohlc(x=google[&#x27;2008&#x27;].index,</span></span><br><span class="line">                <span class="built_in">open</span>=google[<span class="string">&#x27;2008&#x27;</span>].Open,</span><br><span class="line">                high=google[<span class="string">&#x27;2008&#x27;</span>].High,</span><br><span class="line">                low=google[<span class="string">&#x27;2008&#x27;</span>].Low,</span><br><span class="line">                close=google[<span class="string">&#x27;2008&#x27;</span>].Close)data = [trace]iplot(data, filename=<span class="string">&#x27;simple_ohlc&#x27;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/18.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># OHLC chart of 2008trace = go.Ohlc(x=google.index,</span></span><br><span class="line">                <span class="built_in">open</span>=google.Open,</span><br><span class="line">                high=google.High,</span><br><span class="line">                low=google.Low,</span><br><span class="line">                close=google.Close)data = [trace]iplot(data, filename=<span class="string">&#x27;simple_ohlc&#x27;</span>)</span><br></pre></td></tr></table></figure>

<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/19.png"></p>
<p>2.7 蜡烛图<br>这种图用来作为一种交易工具可视化分析证券、衍生品、外汇、股票、债券、期货等的价格随时间的运动。尽管蜡烛图使用的符号像一个箱子的符号，但是它们的功能是不同的，不能彼此混淆。<br>蜡烛图使用一个蜡烛样的符号来显示多种价格信息，例如开盘价、收盘价、最高价和最低价等。每个符号代表一个单独时间间隔内的压缩的交易活动（一分钟、一小时、一天、一个月等）。每个蜡烛符号画在x轴上一个单独的时间尺度上，以显示那段时间的交易活动。<br>符号中间的矩形被称为实体，用来显示那段时间的开盘价与收盘价的范围。从其顶部和底部延伸出来的线段代表下影和上影（或者灯芯）。它们代表那段时间里的最低价和最高价。当为牛市时（收盘价高于开盘价），实体常为白色或绿色。当为熊市时（收盘价低于开盘价），实体被涂为黑色或红色。(还是跟我们的习惯相反）<br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/20.png"></p>
<p>通过其颜色和形状，蜡烛图对于探测和预测市场随时间的趋势以及解释市场的每日的变动是非常好的工具。例如，实体越长，卖压或买压约大。实体越短，意味着该时间短内价格变动非常小。<br>蜡烛图通过各种指标帮助显示市场心理（交易者的恐惧或贪婪），如形状和颜色，还有一些可以从蜡烛图中识别出来的模式。总的来说，大约有42种已经被识别出来的或简单或复杂的模式。这些从蜡烛图中识别出来的模式对于显示价格关系和预测市场未来可能的变动很有帮助。这里有一些模式。<br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/21.png"><br>请注意，蜡烛图并不表示在开盘价格和收盘价格之间发生的事件——只是关于两种价格之间的关系。因此你无法指出这个时间断内的交易的波动性。<br>来源： <a target="_blank" rel="noopener" href="https://datavizcatalogue.com/methods/candlestick_chart.html">https://datavizcatalogue.com/methods/candlestick_chart.html</a></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># Candlestick chart of march 2008trace = go.Candlestick(x=google[&#x27;03-2008&#x27;].index,</span></span><br><span class="line">                <span class="built_in">open</span>=google[<span class="string">&#x27;03-2008&#x27;</span>].Open,</span><br><span class="line">                high=google[<span class="string">&#x27;03-2008&#x27;</span>].High,</span><br><span class="line">                low=google[<span class="string">&#x27;03-2008&#x27;</span>].Low,</span><br><span class="line">                close=google[<span class="string">&#x27;03-2008&#x27;</span>].Close)data = [trace]iplot(data, filename=<span class="string">&#x27;simple_candlestick&#x27;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/22.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># Candlestick chart of 2008trace = go.Candlestick(x=google[&#x27;2008&#x27;].index,</span></span><br><span class="line">                <span class="built_in">open</span>=google[<span class="string">&#x27;2008&#x27;</span>].Open,</span><br><span class="line">                high=google[<span class="string">&#x27;2008&#x27;</span>].High,</span><br><span class="line">                low=google[<span class="string">&#x27;2008&#x27;</span>].Low,</span><br><span class="line">                close=google[<span class="string">&#x27;2008&#x27;</span>].Close)data = [trace]iplot(data, filename=<span class="string">&#x27;simple_candlestick&#x27;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/23.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># Candlestick chart of 2006-2018trace = go.Candlestick(x=google.index,</span></span><br><span class="line">                <span class="built_in">open</span>=google.Open,</span><br><span class="line">                high=google.High,</span><br><span class="line">                low=google.Low,</span><br><span class="line">                close=google.Close)data = [trace]iplot(data, filename=<span class="string">&#x27;simple_candlestick&#x27;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/24.png"><br>2.8 自相关与部分自相关<br>自相关(Autocorrelation)——自相关函数(The autocorrelation function, ACF)测量一个序列在不同的片段上与其自身的相关性。<br>部分自相关(Partial Autocorrelation)——部分自相关函数可以被解释为一个序列的片段是其之前的片段的回归。可以用标准线性回归来解释这个概念，这是部分片段的改变而其它片段保持不变。<br>来源：<a target="_blank" rel="noopener" href="https://www.quora.com/What-is-the-difference-among-auto-correlation-partial-auto-correlation-and-inverse-auto-correlation-while-modelling-an-ARIMA-series">https://www.quora.com/What-is-the-difference-among-auto-correlation-partial-auto-correlation-and-inverse-auto-correlation-while-modelling-an-ARIMA-series</a><br>自相关性</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 自相关</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">plot_acf(humidity[<span class="string">&quot;San Diego&quot;</span>], lags = <span class="number">25</span>, title = <span class="string">&quot;San Diego&quot;</span>)</span><br><span class="line">plt.savefig(<span class="string">&quot;acf.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/25.png"><br>所有的延迟都接近1或者至少大于置信区间，它们有显著的统计学差异的。<br>部分自相关</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 部分自相关</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> plot_pacf(humidity[<span class="string">&quot;San Diego&quot;</span>], lags = <span class="number">25</span>, title = <span class="string">&quot;San Diego, pacf&quot;</span>)</span><br><span class="line"> plt.savefig(<span class="string">&quot;pacf.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/26.png"><br>由于差异有统计学意义，在头两个延迟之后的部分自相关性非常低。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">plot_pacf(microsoft[<span class="string">&quot;Close&quot;</span>], lags = <span class="number">25</span>)</span><br><span class="line">plt.savefig(<span class="string">&quot;ms_pacf.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/27.png"><br>这里，只有第0,1和20个延迟差异有统计学意义。</p>
<p>3.时间序列分解与随机行走<br>3.1 趋势，季节性和噪音<br>这些是一个时间序列的组成部分<br>趋势：一个时间序列的持续向上或向下的斜率<br>季节性：一个时间序列的清晰的周期性模式（就像正弦函数）<br>噪音：异常或缺失数据</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 3.时间序列分解与随机行走</span></span><br><span class="line"><span class="comment"># 趋势，季节性和噪音</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">google[<span class="string">&quot;High&quot;</span>].plot(figsize = (<span class="number">16</span>, <span class="number">8</span>))</span><br><span class="line">fig.savefig(<span class="string">&quot;google_trend.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/28.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 分解</span></span><br><span class="line">rcParams[<span class="string">&quot;figure.figsize&quot;</span>] = <span class="number">11</span>, <span class="number">9</span></span><br><span class="line">decomposed_google_volume = sm.tsa.seasonal_decompose(google[<span class="string">&quot;High&quot;</span>], freq = <span class="number">360</span>)</span><br><span class="line">fig = decomposed_google_volume.plot()</span><br><span class="line">fig.savefig(<span class="string">&quot;decomposed.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/29.png"><br>在上图中有一个清晰的向上的趋势。<br>你也可以看到规则的周期性改变<br>不规则的噪音代表着数据异常或缺失。</p>
<p>3.2 白噪音<br>白噪音是<br>恒定的平均值<br>恒定的差异<br>所有的偏移的零自相关</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 白噪音</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">rcParams[<span class="string">&quot;figure.figsize&quot;</span>] = <span class="number">16</span>, <span class="number">6</span></span><br><span class="line">white_noise = np.random.normal(loc = <span class="number">0</span>, scale = <span class="number">1</span>, size = <span class="number">1000</span>)</span><br><span class="line">plt.plot(white_noise)</span><br><span class="line">fig.savefig(<span class="string">&quot;whitenoise.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/30.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 绘制白噪音的自相关关系</span></span><br><span class="line"> fig = plt.figure()</span><br><span class="line"> plot_acf(white_noise, lags = <span class="number">20</span>)</span><br><span class="line"> plt.savefig(<span class="string">&quot;wn_acf.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/31.png"><br>观察所有偏移都在置信区间内（阴影部分），因此是有统计学意义的。</p>
<p>3.3 随机行走<br>随机行走是一个数学概念，是一个随机的过程，描述一个在一些数学空间（如整数）由持续的随机步数来描述的路径。<br>一般来说，对于股市，今天的价格 = 昨天的价格+噪音<br>Pt = Pt-1 + εt<br>随机行走不能被预测，因为噪音是随机的。<br>具有Drift(drift(μ) 是 0-平均值）的随机行走<br>Pt - Pt-1 = μ + εt<br>对随机行走的回归测试<br>Pt = α + βPt-1 + εt<br>化简为<br>Pt - Pt-1 = α + βPt-1 + εt<br>检验：<br>H0：β = 1（这是一个随机行走）<br>H1：β &lt; 1 （这不是一个随机行走）<br>Dickey-Fuller（DF）检验<br>H0：β = 0（这是一个随机行走）<br>H1：β &lt; 0 （这不是一个随机行走）<br>单位根检验（Augmented Dickey-Fuller test）<br>单位根检验的零假设是一个时间序列样本存在单位根，基本上单位根检验在RHS上有更多的延迟改变。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 随机行走 </span></span><br><span class="line"><span class="comment"># 单位根检验谷歌和微软的成交量</span></span><br><span class="line">adf = adfuller(microsoft[<span class="string">&quot;Volume&quot;</span>])</span><br><span class="line">print(<span class="string">&quot;p-value of microsoft: &#123;&#125;&quot;</span>.<span class="built_in">format</span>(<span class="built_in">float</span>(adf[<span class="number">1</span>])))</span><br><span class="line">adf = adfuller(google[<span class="string">&quot;Volume&quot;</span>])</span><br><span class="line">print(<span class="string">&quot;p-value of google: &#123;&#125;&quot;</span>.<span class="built_in">format</span>(<span class="built_in">float</span>(adf[<span class="number">1</span>])))</span><br></pre></td></tr></table></figure>
<p>p-value of microsoft: 0.0003201525277652296<br> p-value of google: 6.510719605767195e-07<br>由于微软的p值为0.0003201525小于0.05，零假设被拒绝，这个序列不是一个随机行走序列。<br>谷歌的p值为0.0000006510小于0.05（此处原文为大于，疑有误），零假设被拒绝，这个序列不是一个随机行走序列。<br>产生一个随机行走序列</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 产生一个随机行走</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">seed(<span class="number">42</span>)</span><br><span class="line">rcParams[<span class="string">&quot;figure.figsize&quot;</span>] = <span class="number">16</span>, <span class="number">6</span></span><br><span class="line">random_walk = normal(loc = <span class="number">0</span>, scale = <span class="number">0.01</span>, size = <span class="number">1000</span>)</span><br><span class="line">plt.plot(random_walk)</span><br><span class="line">fig.savefig(<span class="string">&quot;random_walk.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/32.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">fig = plt.figure()</span><br><span class="line">plt.hist(random_walk)</span><br><span class="line">fig.savefig(<span class="string">&quot;random_hist.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/33.png"><br>3.4 稳定性<br>一个稳定的时间序列是指其统计性质如平均值，方差，自相关性等，都不随时间变化的时间序列。<br>强稳定性：是指其概率分布绝对的不随时间变化而变化的随机过程。因此，类似平均值、方差等参数也不随时间而变化。<br>弱稳定性：是指平均值、方差、自相关性都随时间变化保持恒定的过程。<br>稳定性非常重要，因为非稳定序列有很多影响因素，在建模的时候会很复杂。diff()方法可以容易的将一个非稳定序列转化为稳定序列。<br>我们将尝试将上面的时间序列的周期性部分分解。（We will try to decompose seasonal component of the above decomposed time series.）</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 稳定性</span></span><br><span class="line"><span class="comment"># 初始的非稳定序列</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">decomposed_google_volume.trend.plot()</span><br><span class="line">fig.savefig(<span class="string">&quot;nonstationary.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/34.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 新的稳定的序列，即一阶差分</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">decomposed_google_volume.trend.diff().plot()</span><br><span class="line">fig.savefig(<span class="string">&quot;stationary.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/35.png"><br>4.使用statstools建模<br>4.1 AR模型<br>一个自回归(autoregressive, AR）模型代表了一类随机过程，这类过程用于描述自然界中特定的时间变化过程，如经济学。该模型认为输出变量线性的依赖于其之前的数据以及一个随机成分（一个有缺陷的预测值）；因此模型是以随机差分方程的形式出现。<br>AR(1)模型<br>Rt = μ + ΦRt-1 +  εt<br>由于RHS只有一个延迟值(Rt-1)，这被称为一阶AR模型，其中μ是平均值， ε是t时刻的噪音。<br>如果Φ=1，这是随机行走。如果Φ=0，这是噪音。如果-1&lt;Φ&lt;1，它是稳定的。如果Φ为负值，有一个人为因素，如果Φ为正值，有一个动量。( If ϕ is -ve, there is men reversion. If ϕ is +ve, there is momentum.)<br>AR(2)模型<br>Rt = μ + Φ1Rt-1 + Φ2Rt-2 + εt<br>AR(3)模型<br>Rt = μ + Φ1Rt-1 + Φ2Rt-2 +  Φ3Rt-3 + εt<br>AR(1)模拟模型</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 4.使用statstools建模</span></span><br><span class="line"><span class="comment"># AR(1)模型</span></span><br><span class="line"><span class="comment"># AR(1) MA(1)模型: AR参数 = 0.9</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">rcParams[<span class="string">&#x27;figure.figsize&#x27;</span>] = <span class="number">16</span>, <span class="number">12</span></span><br><span class="line">plt.subplot(<span class="number">4</span>,<span class="number">1</span>,<span class="number">1</span>)</span><br><span class="line">ar1 = np.array([<span class="number">1</span>, -<span class="number">0.9</span>])</span><br><span class="line">ma1 = np.array([<span class="number">1</span>])</span><br><span class="line">AR1 = ArmaProcess(ar1, ma1)</span><br><span class="line">sim1 = AR1.generate_sample(nsample = <span class="number">1000</span>)</span><br><span class="line">plt.title(<span class="string">&quot;AR(1) model : AR parameter = +0.9&quot;</span>)</span><br><span class="line">plt.plot(sim1)</span><br><span class="line"><span class="comment"># AR(1) MA(1)模型: AR参数 = -0.9</span></span><br><span class="line">plt.subplot(<span class="number">4</span>,<span class="number">1</span>,<span class="number">2</span>)</span><br><span class="line">ar2 = np.array([<span class="number">1</span>, <span class="number">0.9</span>])</span><br><span class="line">ma2 = np.array([<span class="number">1</span>])</span><br><span class="line">AR2 = ArmaProcess(ar2, ma2)</span><br><span class="line">sim2 = AR2.generate_sample(nsample = <span class="number">1000</span>)</span><br><span class="line">plt.title(<span class="string">&quot;AR(1) model : AR parameter = -0.9&quot;</span>)</span><br><span class="line">plt.plot(sim2)</span><br><span class="line"><span class="comment"># AR(2) MA(1)模型: AR参数 = 0.9</span></span><br><span class="line">plt.subplot(<span class="number">4</span>,<span class="number">1</span>,<span class="number">3</span>)</span><br><span class="line">ar3 = np.array([<span class="number">2</span>, -<span class="number">0.9</span>])</span><br><span class="line">ma3 = np.array([<span class="number">1</span>])</span><br><span class="line">AR3 = ArmaProcess(ar3, ma3)</span><br><span class="line">sim3 = AR3.generate_sample(nsample = <span class="number">1000</span>)</span><br><span class="line">plt.title(<span class="string">&quot;AR(2) model : AR parameter = +0.9&quot;</span>)</span><br><span class="line">plt.plot(sim3)</span><br><span class="line"><span class="comment"># AR(2) MA(1)模型: AR参数 = -0.9</span></span><br><span class="line">plt.subplot(<span class="number">4</span>,<span class="number">1</span>,<span class="number">4</span>)</span><br><span class="line">ar4 = np.array([<span class="number">2</span>, <span class="number">0.9</span>])</span><br><span class="line">ma4 = np.array([<span class="number">1</span>])</span><br><span class="line">AR4 = ArmaProcess(ar4, ma4)</span><br><span class="line">sim4 = AR4.generate_sample(nsample = <span class="number">1000</span>)</span><br><span class="line">plt.title(<span class="string">&quot;AR(2) model : AR parameter = -0.9&quot;</span>)</span><br><span class="line">plt.plot(sim4)</span><br><span class="line">fig.savefig(<span class="string">&quot;AR.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/36.png"></p>
<p>一个模拟模型的预测</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 模型预测</span></span><br><span class="line">model = ARMA(sim1, order=(<span class="number">1</span>, <span class="number">0</span>))</span><br><span class="line">result = model.fit()</span><br><span class="line">print(result.summary())</span><br><span class="line">print(<span class="string">&quot;μ = &#123;&#125;, φ = &#123;&#125;&quot;</span>.<span class="built_in">format</span>(result.params[<span class="number">0</span>], result.params[<span class="number">1</span>]))</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/37.png"><br>Φ约为0.9，是我们在第一个模拟模型中选择的AR参数。<br>预测模型</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 用模型预测</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">fig = result.plot_predict(start = <span class="number">900</span>, end = <span class="number">1010</span>)</span><br><span class="line">fig.savefig(<span class="string">&quot;AR_predict.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/38.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">rmse = math.sqrt(mean_squared_error(sim1[<span class="number">900</span>:<span class="number">1011</span>], result.predict(start = <span class="number">900</span>, end = <span class="number">999</span>)))</span><br><span class="line">print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p>The root mean squared error is 1.0408054544358292.<br>y的预测值已经画出，很整洁！</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 预测蒙特利尔的湿度</span></span><br><span class="line">humid = ARMA(humidity[<span class="string">&quot;Montreal&quot;</span>].diff().iloc[<span class="number">1</span>:].values, order = (<span class="number">1</span>, <span class="number">0</span>))</span><br><span class="line">res = humid.fit()</span><br><span class="line">fig = plt.figure()</span><br><span class="line">fig = res.plot_predict(start = <span class="number">1000</span>, end = <span class="number">1100</span>)</span><br><span class="line">fig.savefig(<span class="string">&quot;humid_arma.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/39.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">rmse = math.sqrt(mean_squared_error(humidity[<span class="string">&quot;Montreal&quot;</span>].diff().iloc[<span class="number">900</span>:<span class="number">1000</span>].values, result.predict(start=<span class="number">900</span>,end=<span class="number">999</span>)))print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p>The root mean squared error is 7.218388589479766.<br>不是很令人印象深刻，但让我们试试谷歌股票。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 预测谷歌的收盘价</span></span><br><span class="line">humid = ARMA(google[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:].values, order = (<span class="number">1</span>, <span class="number">0</span>))</span><br><span class="line">res = humid.fit()</span><br><span class="line">fig = plt.figure()</span><br><span class="line">fig = res.plot_predict(start = <span class="number">900</span>, end = <span class="number">1100</span>)</span><br><span class="line">fig.savefig(<span class="string">&quot;google_arma.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/40.png"><br>总有更好的模型</p>
<p>4.2 MA模型<br>移动平均模型在单变量时间序列中很有用。模型假设输出变量线性的依赖于当前的各种随后的随机变量（不准确预测值）。<br>MA(1)模型<br>Rt = μ + εt1 + θεt-1<br>即今日的收益= 平均值+今日的噪音+昨日的噪音。<br>因为RHS中只有一个延迟，这是一阶的MA模型。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># MA(1)模拟模型</span></span><br><span class="line">rcParams[<span class="string">&quot;figure.figsize&quot;</span>] = <span class="number">16</span>, <span class="number">6</span></span><br><span class="line">ar1 = np.array([<span class="number">1</span>])</span><br><span class="line">ma1 = np.array([<span class="number">1</span>, -<span class="number">0.5</span>])</span><br><span class="line">MA1 = ArmaProcess(ar1, ma1)</span><br><span class="line">sim1 = MA1.generate_sample(nsample = <span class="number">1000</span>)</span><br><span class="line">plt.plot(sim1)</span><br><span class="line">plt.savefig(<span class="string">&quot;ma1.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/41.png"><br>MA模型的建模</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 建立MA模型的预测</span></span><br><span class="line">model = ARMA(sim1, order=(<span class="number">0</span>, <span class="number">1</span>))</span><br><span class="line">result = model.fit()</span><br><span class="line">print(result.summary())</span><br><span class="line">print(<span class="string">&quot;μ=&#123;&#125; ,θ=&#123;&#125;&quot;</span>.<span class="built_in">format</span>(result.params[<span class="number">0</span>],result.params[<span class="number">1</span>]))</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/42.png"><br>μ=-0.02284716848276931 ,θ=-0.5650012559991154<br>MA模型的预测</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 使用MA模型进行预测</span></span><br><span class="line">model = ARMA(humidity[<span class="string">&quot;Montreal&quot;</span>].diff().iloc[<span class="number">1</span>:].values, order=(<span class="number">0</span>, <span class="number">3</span>))</span><br><span class="line">result = model.fit()</span><br><span class="line">print(result.summary())</span><br><span class="line">print(<span class="string">&quot;μ=&#123;&#125; ,θ=&#123;&#125;&quot;</span>.<span class="built_in">format</span>(result.params[<span class="number">0</span>],result.params[<span class="number">1</span>]))</span><br><span class="line">result.plot_predict(start = <span class="number">1000</span>, end = <span class="number">1100</span>)</span><br><span class="line">plt.savefig(<span class="string">&quot;ma_forcast.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/43.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/44.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/45.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">rmse = math.sqrt(mean_squared_error(humidity[<span class="string">&quot;Montreal&quot;</span>].diff().iloc[<span class="number">1000</span>:<span class="number">1101</span>].values, result.predict(start=<span class="number">1000</span>,end=<span class="number">1100</span>)))</span><br><span class="line">print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p>The root mean squared error is 11.345129665763626.<br>接着，是ARMA模型<br>4.3 ARMA模型<br>自回归移动平均模型（Autoregressive–moving-average，ARMA）提供一个以二项式形式描述一个（弱的）稳定随机过程的模型。一个是自回归，另一个是移动平均。它是AR和MA模型的综合。<br>ARMA(1,1)模型<br>Rt = μ + ΦRt-1 + εt + θεt-1<br>基本上，它代表着今日收益 = 平均值 + 昨日的收益 + 噪音 + 昨日的噪音。<br>ARMA预测模型的建模<br>因为与AR和MA模型类似，就不进行模拟了。直接进行预测。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 模拟和预测微软股票的市值</span></span><br><span class="line">model = ARMA(microsoft[<span class="string">&quot;Volume&quot;</span>].diff().iloc[<span class="number">1</span>:].values, order = (<span class="number">3</span>, <span class="number">3</span>))</span><br><span class="line">result = model.fit()</span><br><span class="line">print(result.summary())</span><br><span class="line">print(<span class="string">&quot;μ=&#123;&#125; ,θ=&#123;&#125;&quot;</span>.<span class="built_in">format</span>(result.params[<span class="number">0</span>],result.params[<span class="number">1</span>]))</span><br><span class="line">result.plot_predict(start = <span class="number">1000</span>, end = <span class="number">1100</span>)</span><br><span class="line">plt.savefig(<span class="string">&quot;arma_forcast.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/46.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/47.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/48.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">rmse = math.sqrt(mean_squared_error(microsoft[<span class="string">&quot;Volume&quot;</span>].diff().iloc[<span class="number">1000</span>:<span class="number">1101</span>].values, result.predict(start=<span class="number">1000</span>,end=<span class="number">1100</span>)))</span><br><span class="line">print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p>The root mean squared error is 38038241.66905847.<br>ARMA模型的预测结果要优于AR和MA模型</p>
<p>4.4 ARIMA模型<br>求和自回归移动平均模型（autoregressive integrated moving average ，ARIMA）是ARMA模型的一般化。这些模型都是都是拟合时间序列数据，以便更好的理解数据或者预测未来的数据。它应用在不稳定的序列数据，通过一系列的差分步骤（模型相应的“求和”部分）可以消除数据的不稳定。ARIMA模型以ARIMA(p, d, q)形式表示，p是AR的参数，d是差分参数，q是MA的参数<br>ARIMA(1, 0, 0)<br>yt = a1yt-1 + εt</p>
<p>ARIMA(1, 0, 1)<br>yt = a1yt-1 + εt + b1εt-1</p>
<p>ARIMA(1, 1, 1)<br>Δyt = a1Δyt-1 +  εt + b1εt-1, 其中Δyt = yt - yt-1</p>
<p>建立ARIMA的预测模型</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 使用ARIMA模型进行预测</span></span><br><span class="line"><span class="comment"># 预测微软股票的市值</span></span><br><span class="line">rcParams[<span class="string">&quot;figure.figsize&quot;</span>] = <span class="number">16</span>, <span class="number">6</span></span><br><span class="line">model = ARIMA(microsoft[<span class="string">&quot;Volume&quot;</span>].diff().iloc[<span class="number">1</span>:].values, order = (<span class="number">2</span>, <span class="number">1</span>, <span class="number">0</span>))</span><br><span class="line">result = model.fit()</span><br><span class="line">print(result.summary())</span><br><span class="line">result.plot_predict(start = <span class="number">700</span>, end = <span class="number">1000</span>)</span><br><span class="line">plt.savefig(<span class="string">&quot;Arima_predict.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/49.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/50.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">rmse = math.sqrt(mean_squared_error(microsoft[<span class="string">&quot;Volume&quot;</span>].diff().iloc[<span class="number">700</span>:<span class="number">1001</span>].values, result.predict(start=<span class="number">700</span>,end=<span class="number">1000</span>)))</span><br><span class="line">print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p>The root mean squared error is 61937593.98493614.<br>考虑一个轻微的延迟，这是一个很好的模型。<br>4.5 VAR模型<br>向量自回归(Vector autoregression, VAR)是一个随机过程模型，用来捕捉多个时间序列之间的线性相关性。VAR模型是单变量自回归模型(AR模型)推广到多个变量的情况。在VAR模型中所有变量进入模型的途径都一致：每个变量都有一个方程基于其自己的延迟值，其它模型变量的延迟值，以及一个误差因子来解释其演变。VAR模型不需要更多的关于影响一个变量的因素的知识，就像在结构化模型中那样：模型需要的唯一的先导知识是变量列表，其中的变量被暂时地假设会彼此相互影响。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># VAR模型</span></span><br><span class="line"><span class="comment"># 预测谷歌和微软的收盘价</span></span><br><span class="line">train_sample = pd.concat([google[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:], microsoft[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:]], axis=<span class="number">1</span>)</span><br><span class="line">model = sm.tsa.VARMAX(train_sample, order = (<span class="number">2</span>, <span class="number">1</span>), trend = <span class="string">&#x27;c&#x27;</span>)</span><br><span class="line">result = model.fit(maxiter = <span class="number">1000</span>, disp = <span class="literal">False</span>)</span><br><span class="line">print(result.summary())</span><br><span class="line">predicted_result = result.predict(start = <span class="number">0</span>, end = <span class="number">1000</span>)</span><br><span class="line">fig = result.plot_diagnostics()</span><br><span class="line">fig.savefig(<span class="string">&quot;Var_predict.png&quot;</span>)</span><br><span class="line"><span class="comment"># 计算误差</span></span><br><span class="line">rmse = math.sqrt(mean_squared_error(train_sample.iloc[<span class="number">1</span>:<span class="number">1002</span>].values, predicted_result.values))</span><br><span class="line">print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/51.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/52.png"><br>4.6 状态空间模型<br>一个一般的状态空间模型的形式为：<br>yt = Ztαt + dt + εt<br>αt = Ttαt - 1 + ct + Rtηt<br>其中yt代表了在时间t中的观察向量，αt代表了（未观察的）在时间t的状态向量，不规则的成分定义如下：<br>εt ~ N(0, Ht)<br>ηt ~ N(0, Qt)<br>方程中的其余变量(Zt, dt, Ht, Tt, ct, Rt, Qt)是描述过程的矩阵。它们的名称和维度如下：<br>(这些就不翻译了)<br>Z : design (k_endog×k_states×nobs)<br>d : obs_intercept (k_endog×nobs)<br>H : obs_cov (k_endog×k_endog×nobs)<br>T : transition (k_states×k_states×nobs)<br>c : state_intercept (k_states×nobs)<br>R : selection (k_states×k_posdef×nobs)<br>Q : state_cov (k_posdef×k_posdef×nobs)<br>如果一个矩阵是时间不变的（例如，Zt = Zt + 1 ∀ t），其最后的维度也许大小为1而不是节点的数量。<br>这个一般形式概括了许多非常流行的线性时间序列模型（如下）并且有很高的扩展性，允许对缺失的观察进行估计，进行预测，推动响应函数，等等。<br>来源:<a target="_blank" rel="noopener" href="https://www.statsmodels.org/dev/statespace.html">https://www.statsmodels.org/dev/statespace.html</a><br>4.6.1 SARIMA模型<br>SARIMA模型对于季节性时间序列的建模很有用，其数据的平均值和其它统计指标在某一年度内是不稳定的，SARIMA模型是非季节性自回归移动平均模型(ARMA)和自回归求和移动平均模型(ARIMA）的直接扩展。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># SARIMA模型</span></span><br><span class="line"><span class="comment"># 预测谷歌的收盘价</span></span><br><span class="line">train_sample = google[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:].values</span><br><span class="line">model = sm.tsa.SARIMAX(train_sample, order = (<span class="number">4</span>, <span class="number">0</span>, <span class="number">4</span>), trend = <span class="string">&#x27;c&#x27;</span>)</span><br><span class="line">result = model.fit(maxiter = <span class="number">1000</span>, disp = <span class="literal">True</span>)</span><br><span class="line">print(result.summary())</span><br><span class="line">predicted_result = result.predict(start = <span class="number">0</span>, end = <span class="number">500</span>)</span><br><span class="line">fig = result.plot_diagnostics()</span><br><span class="line">fig.savefig(<span class="string">&quot;sarimax.png&quot;</span>)</span><br><span class="line"><span class="comment"># 计算误差</span></span><br><span class="line">rmse = math.sqrt(mean_squared_error(train_sample[<span class="number">1</span>:<span class="number">502</span>], predicted_result))</span><br><span class="line">print(<span class="string">&quot;The root mean squared error is &#123;&#125;.&quot;</span>.<span class="built_in">format</span>(rmse))</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/53.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/54.png"></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">fig = plt.figure()</span><br><span class="line">plt.plot(train_sample[<span class="number">1</span>:<span class="number">502</span>], color = <span class="string">&quot;red&quot;</span>)</span><br><span class="line">plt.plot(predicted_result, color = <span class="string">&quot;blue&quot;</span>)</span><br><span class="line">plt.legend([<span class="string">&quot;Actual&quot;</span>, <span class="string">&quot;Predicted&quot;</span>])</span><br><span class="line">plt.title(<span class="string">&quot;Google closing price&quot;</span>)</span><br><span class="line">fig.savefig(<span class="string">&quot;sarimax_test.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/55.png"><br>4.6.2 未观察成分<br>UCM将序列拆分为组成成分，例如趋势、季节、周期，以及衰退因素，以预测序列。下面的模型给出了一个可能的方案。<br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/56.png"><br>来源：<a target="_blank" rel="noopener" href="http://support.sas.com/documentation/cdl/en/etsug/66840/HTML/default/viewer.htm#etsug_ucm_details01.htm">http://support.sas.com/documentation/cdl/en/etsug/66840/HTML/default/viewer.htm#etsug_ucm_details01.htm</a></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 未观察成分模型</span></span><br><span class="line"><span class="comment"># 预测谷歌的收盘价</span></span><br><span class="line">train_sample = google[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:].values</span><br><span class="line">model = sm.tsa.UnobservedComponents(train_sample, <span class="string">&quot;local level&quot;</span>)</span><br><span class="line">result = model.fit(maxiter = <span class="number">1000</span>, disp = <span class="literal">True</span>)</span><br><span class="line">print(result.summary())</span><br><span class="line">predicted_result = result.predict(start = <span class="number">0</span>, end = <span class="number">500</span>)</span><br><span class="line">fig = result.plot_diagnostics()</span><br><span class="line">fig.savefig(<span class="string">&quot;unobserve.png&quot;</span>)</span><br><span class="line"><span class="comment"># 计算误差</span></span><br><span class="line"><span class="comment"># rmse = math.sqrt(mean_squared_error(train_sample[1:502], predicted_result))</span></span><br><span class="line"><span class="comment"># print(&quot;The root mean squared error is &#123;&#125;.&quot;.format(rmse))</span></span><br><span class="line">fig = plt.figure()</span><br><span class="line">plt.plot(train_sample[<span class="number">1</span>:<span class="number">502</span>], color = <span class="string">&quot;red&quot;</span>)</span><br><span class="line">plt.plot(predicted_result, color = <span class="string">&quot;blue&quot;</span>)</span><br><span class="line">plt.legend([<span class="string">&quot;Actual&quot;</span>, <span class="string">&quot;Predicted&quot;</span>])</span><br><span class="line">plt.title(<span class="string">&quot;Google closing price&quot;</span>)</span><br><span class="line">fig.savefig(<span class="string">&quot;unobserve_test.png&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/57.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/58.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/59.png"><br>4.6.3 动态因子模型<br>动态因子模型是一个灵活的模型，用于多变量时间序列，其中观测的内部变量与外部协变量和未观测的因子呈线性关系。因此有一个向量自回归结构。未观测的因子也许是外部协变量的一个函数。依赖变量对方程的干扰也许是自相关的。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 动态因子模型</span></span><br><span class="line"><span class="comment"># 预测谷歌的收盘价</span></span><br><span class="line">train_sample = pd.concat([google[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:], microsoft[<span class="string">&quot;Close&quot;</span>].diff().iloc[<span class="number">1</span>:]], axis=<span class="number">1</span>)</span><br><span class="line">model = sm.tsa.DynamicFactor(train_sample, k_factors=<span class="number">1</span>, factor_order=<span class="number">2</span>)</span><br><span class="line">result = model.fit(maxiter = <span class="number">1000</span>, disp = <span class="literal">True</span>)</span><br><span class="line">print(result.summary())</span><br><span class="line">predicted_result = result.predict(start = <span class="number">0</span>, end = <span class="number">1000</span>)</span><br><span class="line">fig = result.plot_diagnostics()</span><br><span class="line">fig.savefig(<span class="string">&quot;DynamicFactor.png&quot;</span>)</span><br><span class="line"><span class="comment"># 计算误差</span></span><br><span class="line"><span class="comment"># rmse = math.sqrt(mean_squared_error(train_sample[1:502], predicted_result))</span></span><br><span class="line"><span class="comment"># print(&quot;The root mean squared error is &#123;&#125;.&quot;.format(rmse))</span></span><br></pre></td></tr></table></figure>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/60.png"><br><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/blog0178-QTLearn/07/61.png"><br>我将尽快增加更多的回归模型，覆盖更多的主题。但根据我的经验，对于时间序列预测的最好模型是LSTM，它是基于循环神经网络( Recurrent Neural Networks）的。我已经为这个主题准备了一个教程，这是链接： <a target="_blank" rel="noopener" href="https://www.kaggle.com/thebrownviking20/intro-to-recurrent-neural-networks-lstm-gru">https://www.kaggle.com/thebrownviking20/intro-to-recurrent-neural-networks-lstm-gru</a></p>
<p>参考文献（有更深入的内容和解释）：</p>
<ul>
<li>Manipulating Time Series Data in Python <a target="_blank" rel="noopener" href="https://www.datacamp.com/courses/manipulating-time-series-data-in-python">https://www.datacamp.com/courses/manipulating-time-series-data-in-python</a></li>
<li>Introduction to Time Series Analysis in Python <a target="_blank" rel="noopener" href="https://www.datacamp.com/courses/introduction-to-time-series-analysis-in-python">https://www.datacamp.com/courses/introduction-to-time-series-analysis-in-python</a></li>
<li>Visualizing Time Series Data in Python <a target="_blank" rel="noopener" href="https://www.datacamp.com/courses/visualizing-time-series-data-in-python">https://www.datacamp.com/courses/visualizing-time-series-data-in-python</a></li>
<li>VAR models and LSTM <a target="_blank" rel="noopener" href="https://www.youtube.com/watch?v=_vQ0W_qXMxk">https://www.youtube.com/watch?v=_vQ0W_qXMxk</a></li>
<li>State space models <a target="_blank" rel="noopener" href="https://www.statsmodels.org/dev/statespace.html">https://www.statsmodels.org/dev/statespace.html</a><br>敬请期待更多的内容！并且别忘了点赞和评论。</li>
</ul>
<p>我发文章的四个地方，欢迎大家在朋友圈等地方分享，欢迎点“在看”。<br>我的个人博客地址：<a href="https://zwdnet.github.io/">https://zwdnet.github.io</a><br>我的知乎文章地址： <a target="_blank" rel="noopener" href="https://www.zhihu.com/people/zhao-you-min/posts">https://www.zhihu.com/people/zhao-you-min/posts</a><br>我的博客园博客地址： <a target="_blank" rel="noopener" href="https://www.cnblogs.com/zwdnet/">https://www.cnblogs.com/zwdnet/</a><br>我的微信个人订阅号：赵瑜敏的口腔医学学习园地</p>
<p><img src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/other/wx.jpg"></p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line"></span><br></pre></td></tr></table></figure>
      
    </div>
    
    
    

    

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div>欢迎打赏！感谢支持！</div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/other/mm_facetoface_collect_qrcode_1542944836634.png" alt=" 微信支付"/>
        <p>微信支付</p>
      </div>
    

    
      <div id="alipay" style="display: inline-block">
        <img id="alipay_qr" src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/other/1542944857770.jpg" alt=" 支付宝"/>
        <p>支付宝</p>
      </div>
    

    

  </div>
</div>

      </div>
    

    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/Python/" rel="tag"># Python</a>
          
            <a href="/tags/%E9%87%8F%E5%8C%96%E6%8A%95%E8%B5%84/" rel="tag"># 量化投资</a>
          
            <a href="/tags/%E6%97%B6%E9%97%B4%E5%BA%8F%E5%88%97%E5%88%86%E6%9E%90/" rel="tag"># 时间序列分析</a>
          
            <a href="/tags/kaggle/" rel="tag"># kaggle</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2020/03/07/%E6%A5%94%E7%8A%B6%E7%BC%BA%E6%8D%9F%E7%9A%84%E5%92%AC%E5%90%88%E6%B2%BB%E7%96%97/" rel="next" title="楔状缺损的咬合治疗">
                <i class="fa fa-chevron-left"></i> 楔状缺损的咬合治疗
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2020/03/07/%E9%87%8F%E5%8C%96%E6%8A%95%E8%B5%84%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B012%E2%80%94%E2%80%94%E6%97%B6%E9%97%B4%E5%BA%8F%E5%88%97%E5%88%86%E6%9E%90%E5%AE%9E%E6%93%8D/" rel="prev" title="量化投资学习笔记12——时间序列分析实操">
                量化投资学习笔记12——时间序列分析实操 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          

  
    <div class="comments" id="comments">
      <div id="lv-container" data-id="city" data-uid="MTAyMC80MTA2Mi8xNzU4Nw=="></div>
    </div>

  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      

      <section class="site-overview-wrap sidebar-panel sidebar-panel-active">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image"
                src="https://zymblog-1258069789.cos.ap-chengdu.myqcloud.com/other/tx.jpg"
                alt="" />
            
              <p class="site-author-name" itemprop="name"></p>
              <p class="site-description motion-element" itemprop="description"></p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/archives/%20%7C%7C%20archive">
              
                  <span class="site-state-item-count">452</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-categories">
                <a href="/categories/index.html">
                  <span class="site-state-item-count">29</span>
                  <span class="site-state-item-name">分类</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/tags/index.html">
                  <span class="site-state-item-count">544</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          

          

          
          

          
          

          

        </div>
      </section>

      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; <span itemprop="copyrightYear">2021</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">本站版权归赵瑜敏所有，如欲转载请与本人联系。</span>

  
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item-icon">
      <i class="fa fa-area-chart"></i>
    </span>
    
      <span class="post-meta-item-text">Site words total count&#58;</span>
    
    <span title="Site words total count">1225.8k</span>
  
</div>









<div>
  <script type="text/javascript">var cnzz_protocol = (("https:" == document.location.protocol) ? " https://" : " http://");document.write(unescape("%3Cspan id='cnzz_stat_icon_1275447216'%3E%3C/span%3E%3Cscript src='" + cnzz_protocol + "s11.cnzz.com/z_stat.php%3Fid%3D1275447216%26online%3D1%26show%3Dline' type='text/javascript'%3E%3C/script%3E"));</script>
</div>

        







  <div style="display: none;">
    <script src="//s95.cnzz.com/z_stat.php?id=1275447216&web_id=1275447216" language="JavaScript"></script>
  </div>



        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.4"></script>



  
  


  <script type="text/javascript" src="/js/src/affix.js?v=5.1.4"></script>

  <script type="text/javascript" src="/js/src/schemes/pisces.js?v=5.1.4"></script>



  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.4"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  
    <script type="text/javascript">
      (function(d, s) {
        var j, e = d.getElementsByTagName(s)[0];
        if (typeof LivereTower === 'function') { return; }
        j = d.createElement(s);
        j.src = 'https://cdn-city.livere.com/js/embed.dist.js';
        j.async = true;
        e.parentNode.insertBefore(j, e);
      })(document, 'script');
    </script>
  












  





  

  

  

  
  

  

  

  

  
</body>
</html>
